/* This program reads the full admin data and create a dataset for the placebo
 sample centered around age 50 */

***** Set directories 
local dir_raw 		"~/Dropbox/Retirement gaming/raw"
local dir_clean 	"~/Dropbox/Retirement gaming/clean"
local dir_output 	"~/Dropbox/Retirement gaming/output/dataverse"

local dataname "data_placebo.dta" 

use "`dir_clean'/admindata.dta", clear

order i t j 
sort i t j

*birthdate
format Fnac %td
g birth_month=mofd(Fnac)
format birth_month %tm
g refbday_month=birth_month+(12*50)
format refbday_month %tm
* age in months centered at 50's birthday
g agemonths_centered = t-refbday_month // this is number of months after ref age birthday
g agemonths=agemonths_centered+12*50
* age in years centered at 50
g age=agemonths/12

* Keep public service and banks
drop aportaci_min 
cap drop aux
g civilserv = aportaci==2 // contributes to social security as civil servant 
keep if civilserv==1
		
* drop females
drop if sexo!=1 

* Identify and drop people in early retirement regimes
drop if vf_min==103 // these are people working while already retired
drop if vf_min==97 | vf_min==98 // people reported with no service to the firm

* RETIREMENT AGE
g tegr=mofd(Fegr)
format tegr %tm
g retirnow=(t==tegr & causal_5==1) // reported as leaving due to retirement
* keep only first retirement obs
cap drop aux
g aux=t if retirnow==1
bysort i: egen tretir=min(aux)
format tretir %tm
drop aux
g postretir=0
replace postretir=1 if t>tretir
replace retirnow=0 if postretir==1
bysort i: egen retir_insample=max(retirnow)

* Drop people with employement after retirement
g flag= W>0 & W<. & postretir==1
bysort i: egen iflag=max(flag)
drop if iflag==1
drop flag iflag

*SELF-EMPLOYED
g self_empl=status_1==1 

*EMPLOYED
g empl=status_3==1 

* Drop if no earnings reported
drop if W==. 
drop if W==0 

* Keep only salaried work observations
g sample_salary = tipREM_min==1 
keep if sample_salary==1 


* Drop earnings outliers 
sum W, det 
g wlow=W<3.604606 // use bound used in the main sample
g whigh=W>r(p95) 
bys i: egen anylow=max(wlow)
bys i: egen anyhigh=max(whigh)
g anywout= anylow==1 | anyhigh==1
drop if wlow==1 | whigh==1
drop wlow whigh anylow anyhigh

* Firm size categories
label define size_cat 0 "Micro less than 5 " 1 "Micro 5-9" 2 "Small 10-19" 3 "Small 20-49" 4 "Medium 50-249" 5 "Large 250 plus"
foreach X in ndep {
	g 		`X'_cat=0 if `X' <5 // micro 1 less than 5
	replace `X'_cat=1 if `X'>=5 & `X'<10 // micro 2
	replace `X'_cat=2 if `X'>=10 & `X'<20 // small 1
	replace `X'_cat=3 if `X'>=20 & `X'<50 // small 2
	replace `X'_cat=4 if `X'>=50 & `X'<250 // medium
	replace `X'_cat=5 if `X'>=250  // large
	label values `X'_cat size_cat
}

cap drop year
g year=yofd(dofm(t))


*** Keep only main job and drop duplicates
duplicates tag i t, g(tag)
bys i t: egen maxsal=max(sample_salary) 
drop if tag>0 & maxsal==1 & sample_salary==0
drop tag maxsal
duplicates tag i t, g(tag)
bysort i t: egen maxrem=max(W)
drop if tag>0 & W<maxrem
drop tag
drop maxrem
duplicates tag i t, g(tag)
sort i t
duplicates drop i t W, force
drop tag  

*JOB CHANGES
xtset i t
sort i t
cap drop auxj
bys i: gen auxj=l1.j
sort i t
replace auxj=j[_n-1] if auxj==. & i[_n]==i[_n-1]
g jobchange=auxj!=j if auxj!=. 
g agedisc=floor(age)
bys i agedisc: egen jobchange_atage=max(jobchange) 
replace jobchange_atage=. if jobchange==.
bys i: egen anyjobchange=max(jobchange) 


* Cohorts in sample
g cohort=yofd(Fnac)
keep if birth_month>=tm(1941m4) & birth_month<tm(1971m4)

*** Keep observations in the relevant interval around age 50
keep if agemonths_centered>=-60 & agemonths_centered<96

*** Age centered around age 50
g age_centered=.
forvalues y = -5(1)7 {
	local min=`y'*12
	local max=(`y'+1)*12
	replace age_centered=`y' if agemonths_centered>=`min' & agemonths_centered<`max'
}

*** 2-digit CIIU
cap drop aux
tostring ciiu, g(aux)
g ciiu2=substr(aux,1,2)
destring ciiu2, replace
drop aux

g othempl= empl==0 // these are other status, like hourly and temp workers, on leave, and others

g anyempl=1 // this marks all observations

* Determine each person's max number of months in sample (depending on cohort)
local firstyrfull=1996-(45)
local lastyrfull=2016-(57)+1

g max_months=156
replace max_months=156 - ( tm(`firstyrfull'm4)-birth_month  ) if birth_month < tm(`firstyrfull'm4)
replace max_months=156 - ( birth_month - tm(`lastyrfull'm3)) if birth_month > tm(`lastyrfull'm3)


* Select sample
foreach X in empl othempl anyempl {
	* individuals observed employed/self_employed for at least 6 months overall
	bys i : egen count_m`X' = total(`X') 
	g prop_m`X'= count_m`X'/max_months 
	g sample_`X'=`X'==1 & (count_m`X'>=6) 
	bys i: egen isample_`X' = max(sample_`X')
} 

* DROP from sample of empl those who ever have self_employment
foreach X in empl othempl {
	bysort i: egen `X'_any2=max(`X')
}
count if isample_empl==1 & othempl_any2==1

tab isample_empl sample_empl, m
tab isample_othempl sample_othempl, m
tab isample_anyempl sample_anyempl, m

sort i t
bysort i: g order=_n
count if isample_empl==1 & order==1
count if isample_anyempl==1 & order==1

* Drop people in none of the samples
drop if isample_anyempl!=1 


* Keep relevant variables
keep i t j year age* prop* *empl* W* remC1_sum remC2_sum remC3_sum amt_* ben ciiu*  Tipocontr ipc  ndep ndep_cat *sample* birth_month *count* cohort max_months Fing Fegr hrsmonth trem_*_max status_*_max *jobchange* any*

****************
tempfile workdata
save `workdata', replace

*Add Min Contribution Base info
use `workdata', clear
keep t 
duplicates drop 
tempfile time
save `time'

import delimited "`dir_raw'/FICTO UNIPERSONALES.csv", clear rowrange(5) varnames(5)
foreach var in valorbfc mgravado aportebps{
	destring `var', replace ignore(",")
}	
g t=mofd(date(fvigencia, "MY",2019))
format t %tm
sort t
merge 1:1 t using `time'
sort t
tset t
foreach var in valorbfc mgravado aportebps{
	replace `var' = l.`var' if `var'==.
}	
sort t
foreach var in valorbfc mgravado aportebps{
	replace `var' = `var'[1] if _n==2
}	
drop if _m==1
drop _m fvigencia

merge 1:m t using `workdata'
drop _m

g rficto = mgravado/(1000*ipc)
label var rficto "Minimum Contribution Base in 1000 Pesos of 2015"


save "`dir_clean'/`dataname'", replace


exit


